Hierarchical models

QAECO coding & math club, May 31, 2018

An example

Single regression line

Known sources of variation

Variation in intercepts

Variation in intercepts and slopes

In R: mixed models

# load lme4 package
library(lme4)

# fit model with single intercept and slope
mod_lm <- lm(size ~ temp)

# fit model with random intercepts
mod_int <- lmer(size ~ temp + (1 | species))

# fit model with random intercepts and slopes
mod_slope <- lmer(size ~ temp + (1 + temp | species))

In R: mixed models

# print the fixed effects
fixef(mod_int)
## (Intercept)        temp 
## 100.0135534  -0.3118507
# print the random effects
ranef(mod_int)
## $species
##   (Intercept)
## 1    1.387692
## 2    5.573009
## 3   -2.021700
## 4   -1.574244
## 5   -3.364757

In R: mixed models

# print the fixed effects
fixef(mod_slope)
## (Intercept)        temp 
##  99.9562825  -0.2379978
# print the random effects
ranef(mod_slope)
## $species
##   (Intercept)       temp
## 1    1.317356 -1.4299245
## 2    5.629131 -0.7534335
## 3   -2.060671  0.7314980
## 4   -1.563628 -0.3570966
## 5   -3.322187  1.8089566

In R: a Bayesian approach

# load rstanarm package
library(rstanarm)

# create a data set
data_set <- data.frame(size = size, temp = temp)

# fit model with single intercept and slope
mod_lm_bayes <- stan_glm(size ~ temp,
                         data = data_set,
                         family = "gaussian",
                         iter = 200, ## not a realistic value
                         chains = 2)

# fit model with random intercepts
mod_int_bayes <- stan_lmer(size ~ temp + (1 | species),
                           data = data_set,
                           iter = 200, ## not a realistic value
                           chains = 2)

# fit model with random intercepts and slopes
mod_slope_bayes <- stan_lmer(size ~ temp + (1 + temp | species),
                             data = data_set,
                             iter = 200, ## not a realistic value
                             chains = 2)

# explore model outputs
# launch_shinystan(mod_slope_bayes)

In R: a Bayesian approach

# print the model coefficients
coef(mod_slope)
## $species
##   (Intercept)       temp
## 1   101.27364 -1.6679223
## 2   105.58541 -0.9914312
## 3    97.89561  0.4935003
## 4    98.39265 -0.5950944
## 5    96.63410  1.5709588
## 
## attr(,"class")
## [1] "coef.mer"

Level 2: models within models

\[\begin{equation} y_i = \alpha + \beta \: X_i + \epsilon_i \\ \end{equation}\]
\[\begin{equation} y_i = \alpha_{sp_i} + \beta_{sp_i} \: X_i + \epsilon_i \\ \alpha_{sp_i} = \alpha_{mean} + \epsilon_{sp} \\ \beta_{sp_i} = \beta_{mean} + \delta_{sp} \\ \end{equation}\]

Level 2: models within models

\[\begin{equation} y_i = \alpha + \beta \: X_i + \epsilon_i \\ \end{equation}\] \[\begin{equation} y_i = \alpha(z_i) + \beta(z_i) \: X_i + \epsilon_i \\ \alpha(z_i) = \delta + \gamma \: z_i \\ \beta(z_i) = \phi + \psi \: z_i \\ \end{equation}\]

Example 1: trait-mediated responses

\[\begin{equation} n_i = \alpha + \beta(t_i) \: x_i + \epsilon_i \\ \beta(t_i) = \phi + \psi \: t_i \\ \end{equation}\]

Example 2: sharing information among species

\[\begin{equation} n_i = \alpha_{sp_i} + \beta_{sp_i} \: X_i + \epsilon_i \\ \beta_{sp_i} = \beta_{mean} + \epsilon_{sp} \\ \end{equation}\]

Example 3: occupancy-detection model

\[\begin{equation} y_{ij} \sim Bernoulli(p_{ij} \: z_{i}) \\ logit^{-1}(p_{ij}) = \alpha + f(x_{ij}) \\ z_{i} \sim Bernoulli(\psi_{i}) \\ logit^{-1}(\psi_{i}) = \gamma + g(w_{i}) \\ \end{equation}\]

greta example

# standardise variables
size_std <- scale(size)
temp_std <- scale(temp)

# define priors
alpha <- normal(mean = 0.0, sd = 1.0)
beta <- normal(mean = 0.0, sd = 1.0)
sigma_main <- lognormal(meanlog = 0.0, sdlog = 1.0)

# define linear predictor
mu <- alpha + beta * temp_std

# set likelihood
distribution(size_std) <- normal(mu, sigma_main)

# compile model
mod <- model(alpha, beta, sigma_main)

# plot model
plot(mod)

# sample from model
samples <- mcmc(mod, n_samples = 2000)

greta example

greta example

##      alpha       beta sigma_main 
##       0.00      -0.06       1.00

greta example

# define hyperpriors - means
alpha_mean <- normal(mean = 0.0, sd = 1.0)
beta_mean <- normal(mean = 0.0, sd = 1.0)

# define hyperpriors - SDs
sigma_alpha <- lognormal(meanlog = 0.0, sdlog = 1.0)
sigma_beta <- lognormal(meanlog = 0.0, sdlog = 1.0)

# define priors
alpha <- normal(mean = alpha_mean, sd = sigma_alpha, dim = n_sp)
beta <- normal(mean = beta_mean, sd = sigma_beta, dim = n_sp)
sigma_main <- lognormal(meanlog = 0.0, sdlog = 1.0)

# define linear predictor
mu <- alpha[species] + beta[species] * temp_std

# set likelihood
distribution(size_std) <- normal(mean = mu, sd = sigma_main)

# define model
mod <- model(alpha, beta, sigma_main)

# sample from model
samples <- mcmc(mod, n_samples = 2000)

greta example

greta example

greta example

##           alpha       beta
## [1,]  0.2814064 -0.3739673
## [2,]  1.2455647 -0.1928839
## [3,] -0.4769317  0.1025118
## [4,] -0.3646593 -0.1483228
## [5,] -0.7629544  0.3481009

Stan example

data {
  int<lower=0> n;
  int<lower=0> nsp;
  vector[n] ind_size;
  vector[n] temp;
  int species[n];
}

parameters {
  real alpha_mean;
  real beta_mean;
  vector[nsp] alpha;
  vector[nsp] beta;
  real<lower=0> sigma_alpha;
  real<lower=0> sigma_beta;
  real<lower=0> sigma_main;
}

transformed parameters {
  vector[n] mu;
  
  for (i in 1:n) {
    mu[i] = alpha[species[i]] + beta[species[i]] * temp[i];
  }
}

model {
  
  // model likelihood
  ind_size ~ normal(mu, sigma_main);
  
  // exchangeable priors for varying parameters
  alpha ~ normal(alpha_mean, sigma_alpha);
  beta ~ normal(beta_mean, sigma_beta);

  // overall mean parameters  
  alpha_mean ~ normal(0.0, 1.0);
  beta_mean ~ normal(0.0, 1.0);
  
  // variance terms
  sigma_alpha ~ normal(0.0, 1.0);
  sigma_beta ~ normal(0.0, 1.0);
  sigma_main ~ normal(0.0, 1.0);
  
}

Stan example

# can use the rstan package
library(rstan)

# create a list with all data
data_set <- list(ind_size = size,
                 temp = temp,
                 species = species,
                 n = length(ind_size),
                 nsp = length(unique(species)))

# compile the Stan model (a separate text file in this example)
mod <- stan_model(file = "./stan-model-file.stan")

# sample from the compiled model
samples <- sampling(mod,
                    data = data_set,
                    iter = 2000,
                    chains = 4)

# look at the model outputs
plot(samples)

JAGS example

model {
  
  for (i in 1:n) {
    
    ind_size[i] ~ normal(mu[i], tau_main) 
    mu[i] <- alpha[species[i]] + beta[species[i]] * temp[i]
    
  }
  
  tau_main <- pow(sd_main, -2)
  sd_main ~ dunif(0, 10)
  
  for (i in 1:nsp) {
    
    alpha[i] ~ normal(alpha_mean, tau_alpha)
    beta[i] ~ normal(beta_mean, tau_beta)
    
  }
  
  alpha_mean ~ normal(0.0, 1.0)
  beta_mean ~ normal(0.0, 1.0)
  
  tau_alpha <- pow(sd_alpha, -2)
  sd_alpha ~ dunif(0, 10)
  tau_beta <- pow(sd_beta, -2)
  sd_beta ~ dunif(0, 10)
  
  
}

JAGS example

# can use the rjags package, the R2jags package, or jagsUI
library(jagsUI)

# create a list with all data
data_set <- list(ind_size = size,
                 temp = temp,
                 species = species,
                 n = length(ind_size),
                 nsp = length(unique(species)))

# fit a model
mod <- jags(data = data_set,
            model.file = "./jags-model-file.txt",
            n.iter = 2000,
            n.chains = 3)

# explore the fitted model
plot(mod)